Load libraries

root_path <- "~/Desktop/raw_data_must/"
chrom_path <- paste0(root_path, "Chromium/")
vis_path <- paste0(root_path, "Visium/outs/")
xe_path <- paste0(root_path, "Xenium/outs/")
aggxe_path <- paste0(root_path, "AggXe/outs/")

suppressMessages({
  library(Seurat)
  library(BayesSpace)
  library(ggplot2)
  library(patchwork)
  library(dplyr)
  library(tibble)
  library(scater)
  library(scran)
  library(scuttle)
  library(grid)
  library(hrbrthemes)
  library(gridExtra)
  library(SingleR)
  library(magick)
  library(harmony)
  library(scales)
  library(knitr)
})
source("utils_new.R")

Visium analysis

Read in Visium

Read in Visium Seurat object, and spatially visualize the per spot total count

vis_path <- paste0(root_path, "Visium/outs/")

vis <- Load10X_Spatial(vis_path)
Idents(vis) <- ""
SpatialFeaturePlot(vis, features = "nCount_Spatial") + theme(legend.position = "right")

# saveRDS(vis, "./intermediate_data/vis_raw.rds")

Visium QC

QC Visium with the guidance from OSTA book. Library size (sum in OSTA book, nCount_Spatial in Seurat), number of features detected (detected in OSTA book, nFeature_Spatial in Seurat), mitochondria percentage, and number of cells per spot need to be checked. However, in this breast cancer data, no ground truth of number of cells per spot is given like in the Human DLPFC data in spatialLIBD. Therefore, we skip the last criteria, and focusing on the first three.

Investigate the proper cut-off threshold. Note that Visium is not at single cell resolution, so the automated QC functionality developed for single-cell RNA-seq, such as scuttle::isOutlier(), should be carefully double-checked to prevent falsely removal of spots in spatial data.

Here we have a quick view of the distribution of the three variables of interest. We want to eliminate spots with low total library size, and low number of features detected per spot, and high mitochondria.

vis <- readRDS("./intermediate_data/vis_raw.rds")
# Prepare mitochondria percentage 
vis$percent.mt <- PercentageFeatureSet(vis, pattern = "^MT-")
VlnPlot(vis, features = c("nCount_Spatial", "nFeature_Spatial", "percent.mt"), pt.size = 0.1)

Per-spot QC

Check the lower end of nCount_Spatial, and set the threshold at 100 to visualize the effect of QC in plotQCSpatial_seu(). However, note that the removal is not necessary.

head(sort(vis$nCount_Spatial), 20)
## CCACGCCTGGAGAAGT-1 CGTGTAGACACCTTCC-1 CCAGCATCTACCGCTT-1 TGGCGACACTCGCATC-1 
##                 39                 54                 65                 77 
## TAGGTCTATGGATTAT-1 CTGTGCAACATTGAGG-1 CCGTACTGGACGCAGA-1 TCTGCAGTAGTTATGA-1 
##                108                134                161                187 
## GGTGCGTGGAGTTGGT-1 TCCGAGACAATACGCA-1 GATGGATCGTGGAGGT-1 CGTAGTCGGTTATCGA-1 
##                189                204                391                524 
## ACGCAAGGCGTCCAAT-1 CTCAATGACCGTGGTA-1 ACGAGGCTTAATCACG-1 AAGTCTACGCACTCGG-1 
##                569                629                637                680 
## TAACGAGCTATCCGTC-1 TCTGGTCACTATCTGT-1 ACGAAGTTCCAGCACC-1 GTCACTGTGATAACTC-1 
##                757                764                784                785
table(isOutlier(vis$nCount_Spatial, type = "lower"))
## 
## FALSE 
##  4992

Check the lower end of nFeature_Spatial, and set the threshold also at 100.

head(sort(vis$nFeature_Spatial), 20)
## CCACGCCTGGAGAAGT-1 CGTGTAGACACCTTCC-1 CCAGCATCTACCGCTT-1 TGGCGACACTCGCATC-1 
##                 38                 51                 65                 72 
## TAGGTCTATGGATTAT-1 CTGTGCAACATTGAGG-1 CCGTACTGGACGCAGA-1 GGTGCGTGGAGTTGGT-1 
##                104                125                148                169 
## TCTGCAGTAGTTATGA-1 TCCGAGACAATACGCA-1 GATGGATCGTGGAGGT-1 CGTAGTCGGTTATCGA-1 
##                178                192                352                448 
## ACGCAAGGCGTCCAAT-1 CTCAATGACCGTGGTA-1 ACGAGGCTTAATCACG-1 AAGTCTACGCACTCGG-1 
##                518                537                561                568 
## TCTGGTCACTATCTGT-1 ATGCTGACGACTCGCT-1 TAACGAGCTATCCGTC-1 TCGCCGGAAGGCAGGA-1 
##                603                629                640                643
table(isOutlier(vis$nFeature_Spatial, type = "lower"))
## 
## FALSE 
##  4992

Check the higher end of percent.mt, and no spot has unusually high mitochondria percentage.

tail(sort(vis$percent.mt), 20)
## GTCTGTGGATCTGACG-1 TCTAACAAGCCAATCC-1 TGCACCGGTCACCTCC-1 ATAGCACCTGCTGTCT-1 
##           12.82434           12.84427           12.86702           12.87049 
## TGGCGAGCTCCACGTG-1 TCGACCTGCAGCTCAG-1 CCTAATCAGGCCATTA-1 GAGCGGCAATGGAATA-1 
##           12.88375           12.92543           13.01404           13.02536 
## GCCGCTTCTAAGGTGA-1 TAGTGACTCGCTCCGC-1 ACGGTGTACGGTGCCA-1 ACGCAAGGCATAAGTC-1 
##           13.07899           13.12256           13.12948           13.22388 
## ATCGTAATCCACATGC-1 GGTTCTATACCGCTTG-1 AACATCTTAAGGCTCA-1 TACGGCCTTGTAGGAG-1 
##           13.39920           13.42821           13.62140           13.64523 
## CTCGGACTAACGTGGC-1 GCCATTGTAATGTCTT-1 GGACCATCACCGCCAA-1 AACCTGACAGTGCCGC-1 
##           13.65560           13.85249           13.96749           14.03253

Plot QC result. The helper function here is inspired by the Bioconductor function ggspavis::plotQC(spe), and here plotQCSpatial_seu() takes a Seurat object with “Spatial” assay and has spatial coordinates stored in its metadata.

vis$low_count_spots <- vis$nCount_Spatial < 100 | vis$nFeature_Spatial < 100
plotQCSpatial_seu(vis, flag = "low_count_spots")

Per-gene QC

Deriving low abundance gene flag. For genes, we require it to be detected in at least 20 spots. 66 genes will be removed.

vis_lowgenecount_drop <- rowSums(GetAssayData(vis, "counts") > 0) < 20
table(vis_lowgenecount_drop)
## vis_lowgenecount_drop
## FALSE  TRUE 
## 18019    66

Post QC subsetting

Eliminate genes and spots did not pass QC:

vis <- vis[!vis_lowgenecount_drop,  # low abundance genes
             !vis$low_count_spots ]     # low library size & number of detected genes per spot

dim(vis) # 18019  4988
## [1] 18019  4988
# saveRDS(vis, "./intermediate_data/vis_qcd.rds")

Visium normalization

Normalization wtih SCTransform, as recommended by Seurat framework.

# vis <- readRDS("./intermediate_data/vis_qcd.rds")
vis <- SCTransform(vis, assay = "Spatial", verbose = FALSE)

Visium dimension reduction and clusetering

Dimensionality reduction and clustering with Seurat.

vis <- FindVariableFeatures(vis, selection.method = "vst", nfeatures = 3000)
vis <- RunPCA(vis, npcs = 50, assay = "SCT", verbose = FALSE)
vis <- RunUMAP(vis, reduction = "pca", dims = 1:50)
vis <- FindNeighbors(vis, reduction = "pca", dims = 1:50)
vis <- FindClusters(vis, verbose = FALSE)

# saveRDS(vis, "./intermediate_data/vis_qcd_dimred.rds")

Visium spatially variable genes

Spatially variable genes (SVGs) identification based on spatial autocorrelation “Moran’s I” index. This step takes a long time to run, so for the sake of time, it is recommended to directly load in the intermediate data.

vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
# DO NOT RUN - TAKING TOO LONG
vis <- FindSpatiallyVariableFeatures(vis, assay = "SCT", features = VariableFeatures(vis), selection.method = "moransi")

vis_SVGs_6 = rownames(
  dplyr::slice_min(
    vis[["SCT"]]@meta.features,
    moransi.spatially.variable.rank,
    n = 6
  )
)

Visualize top 6 Visium SVGs in UMAP space.

vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
vis_SVGs_6 = c("DCAF7", "MT-ND1", "CSDE1", "TACO1", "MIEN1", "NRAS")
FeaturePlot(vis, features = vis_SVGs_6, ncol = 3, raster = FALSE)

Visualize top 6 Visium SVGs spatially. Combining results from deconvolution, we can see that genes highly expressed in invasive tumor region has high spatial variability.

markers <- vis_SVGs_6
feat.plots <- purrr::map(markers, function(x) SpatialFeaturePlot(vis, x))
patchwork::wrap_plots(feat.plots, ncol=3)

Visualize Seurat clustering result

Visium UMAP clustering colored by Seurat

vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
p1 <- DimPlot(vis) + ggtitle("Colored by Seurat Clustering")
p2 <- SpatialDimPlot(vis, label = TRUE, label.size = 3)
p1 + p2

Combining the information from deconvolution result later, we would realize that cluster 1 is likely invasive tumor.

Visium spatial clustering with BayesSpace

Spatial clustering with BayesSpace. First, we convert the Visium Seurat object to SingleCellExperiment object as required by BayesSpace.

We carry over everything we have calculated so far for Visium, except the spatial coordinates. Seurat::Load10X_Spatial() does not keep all the spatial columns required for BayesSpace (e.g. enhanced version), and it also changes the scaling of the tissue coordinates to match with H&E image, which is incompatible with BayesSpace. Instead of using SeuratObject::GetTissueCoordinates(vis), we read in the original spatial coordinates, with the following modification.

# First, convert Visium Seurat object to SCE required by BayesSpace
vis_mat <- vis@assays$Spatial@counts
vis_sct <- vis@assays$SCT@counts

vis_pca <- vis@reductions$pca@cell.embeddings
vis_umap <- vis@reductions$umap@cell.embeddings

# Merge in spatial coordinates from original tissue_position.csv.
vis_coord <- read.csv(paste0(vis_path, "spatial/tissue_positions_list.csv"), header = FALSE)
colnames(vis_coord) <- c("barcodes", "in_tissue", "row", "col", "pxl_row_in_fullres", "pxl_col_in_fullres")

vis_meta <- vis@meta.data
vis_meta$barcodes <- rownames(vis_meta)

vis_CD <- vis_meta %>%
  left_join(vis_coord)

vis_sce <- SingleCellExperiment(assays = list(counts = vis_mat, 
                                              SCTcounts = vis_sct), 
                                reducedDims = SimpleList(PCA = vis_pca, UMAP = vis_umap),
                                colData = vis_CD)

# saveRDS(vis_sce, "./intermediate_data/vis_qcd_dimred_sce.rds")

We assign the same number of clusters to BayesSpace identified by Seurat (n.cluster = 18), since we also get the confirmation of a similar number of cluster from Chromium (n.cluster = 19). This step takes a long time to run, so it’s recommended to run on HPC, and save the results.

# Just FYI, there is also a speed-up version of the original Bioconductor R package BayesSpace from GitHub, for the enhanced resolution of BayesSpace (which we won't cover in this tutorial)
# devtools::install_github(senbaikang/BayesSpace)
library(BayesSpace)

vis_sce <- readRDS("./intermediate_data/vis_qcd_dimred_sce.rds")

# DO NOT RUN - TAKING TOO LONG
# We assign the same number of clusters to BayesSpace identified by Seurat
vis_sce <- vis_sce %>% 
        spatialCluster(q = 18, use.dimred = "PCA", platform = "Visium", nrep = 10000)

# saveRDS(vis_sce, "./intermediate_data/vis_qcd_dimred_sce_bayesspace.rds")

Save Visium BayesSpace clustering result back to Visium Seurat object for plotting

vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
vis_sce <- readRDS("./intermediate_data/vis_qcd_dimred_sce_bayesspace.rds")

vis[["spatial.cluster"]] <- vis_sce$spatial.cluster

Visium BayesSpace spatial clustering result

Visualize UMAP and spatial plot of Visium, colored by clustering with Seurat and BayesSpace.

p3 <- DimPlot(vis, group.by = "spatial.cluster") + ggtitle("Colored by BayesSpace spatial clustering")
p4 <- SpatialDimPlot(vis, group.by = "spatial.cluster", label = TRUE, label.size = 3)
p3 + p4

We can see that BayesSpace recovers the structure of DCIS (cluster 1) better compares to Seurat clustering, which does not take into account the spatial coordinate information.

From here on, you can perform differential expression analysis between the clusters.

Visium cell-type deconvolution with Chromium reference

Build reference and spatialRNA objects

library(spacexr)
chrom <- readRDS("./intermediate_data/chrom_raw.rds")

# Reference Chromium, unnormalized counts
chrom_mat <- GetAssayData(chrom, slot = "counts")
cell_types <- as.factor(chrom$Annotation); names(cell_types) <- colnames(chrom)

ref <- Reference(chrom_mat, cell_types)

# Visium to be deconvolved, unnormalized counts
coords <- GetTissueCoordinates(vis)
counts <- GetAssayData(vis, slot = "counts")

puck <- SpatialRNA(coords, counts)

Run RCTD

We specify “full” mode here to indicate there can be many cells in a spot, and save RCTD results.

# DO NOT RUN - TAKING TOO LONG
myRCTD <- create.RCTD(puck, ref, max_cores = 4)
myRCTD <- run.RCTD(myRCTD, doublet_mode = 'full')
# saveRDS(myRCTD, "./intermediate_data/vis_RCTD.rds")

Organize cell-type deconvolution results, the weights should be normalized so the proportion of each cell type in a spot sums up to 1.

myRCTD <- readRDS("./intermediate_data/vis_RCTD.rds")
results <- myRCTD@results
norm_weights <- normalize_weights(results$weights)
cell_type_names <- myRCTD@cell_type_info$info[[2]]
spatialRNA <- myRCTD@spatialRNA
barcodes <- colnames(myRCTD@spatialRNA@counts)

Plot cell-type deconvolution

Plot cell-type deconvolution result. We first check the deconvolution result for one cell-type. Here we set the intensity range based on the probability score range of this cell-type across all spots.

# range(norm_weights[, cell_type_names[1]])
# 1.072845e-05 3.576271e-01

plot_puck_continuous(spatialRNA, barcodes, norm_weights[, cell_type_names[1]], title = cell_type_names[1], ylimit = c(0, 0.4), size = 0.8) + coord_flip() + scale_x_reverse()

We then separate the remaining cell types into two batches for the sake of clarity in visualization. All these plots have the color scale intensity ranging from 0 to 1. (Note the plot is stretched horizontally in the below limited plotting space, but the orientation matches the H&E spatial feature plot of Visium.)

p <- list()
for (i in 2:10){
  which_cell_type <- cell_type_names[i]
  p[[i-1]] <- plot_puck_continuous(spatialRNA, barcodes, norm_weights[, which_cell_type], title = which_cell_type) + coord_flip() + scale_x_reverse() + theme(legend.position = "none")
}
do.call(grid.arrange, p)

p <- list()
for (i in 11:19){
  which_cell_type <- cell_type_names[i]
  p[[i-10]] <- plot_puck_continuous(spatialRNA, barcodes, norm_weights[, which_cell_type], title = which_cell_type) + coord_flip() + scale_x_reverse() + theme(legend.position = "none")
}
do.call(grid.arrange, p)

In addition, you can save the plots with the commented code.

## Create a local directory for saved plots
# RCTD_result_path <- "./results/"
# for (i in 1:length(cell_type_names)){
#   which_cell_type <- cell_type_names[i]
#   plot_puck_continuous(puck, barcodes, norm_weights[, which_cell_type], title = which_cell_type) + coord_flip() + 
#     scale_x_reverse()
#   ggsave(path = RCTD_result_path, filename = paste0(which_cell_type, ".png"))
# }